package ca.pfv.spmf.algorithms.sequentialpatterns.prefixSpan_AGP;

/*
 * Copyright Antonio Gomariz Peñalver 2013
 *
 * This file is part of the SPMF DATA MINING SOFTWARE
 * (http://www.philippe-fournier-viger.com/spmf).
 *
 * SPMF is free software: you can redistribute it and/or modify it under the
 * terms of the GNU General Public License as published by the Free Software
 * Foundation, either version 3 of the License, or (at your option) any later
 * version.
 *
 * SPMF is distributed in the hope that it will be useful, but WITHOUT ANY
 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * SPMF. If not, see <http://www.gnu.org/licenses/>.
 */
import ca.pfv.spmf.algorithms.sequentialpatterns.prefixSpan_AGP.items.*;
import ca.pfv.spmf.algorithms.sequentialpatterns.prefixSpan_AGP.items.creators.AbstractionCreator;
import ca.pfv.spmf.algorithms.sequentialpatterns.prefixSpan_AGP.savers.Saver;
import ca.pfv.spmf.algorithms.sequentialpatterns.prefixSpan_AGP.savers.SaverIntoFile;
import ca.pfv.spmf.algorithms.sequentialpatterns.prefixSpan_AGP.savers.SaverIntoMemory;
import ca.pfv.spmf.tools.MemoryLogger;
import java.io.IOException;
import java.util.BitSet;
import java.util.Map;

/**
 * This is an implementation of the PrefixSpan algorithm by Antonio Gomariz
 * Peñalver(AGP). PrefixSpan was proposed by Pei et al. 2001. This algorithm was
 * inspired in the implementation of SPMF and, from it, I changed and optimized
 * some things. <br/>
 * <br/>
 * 
 * NOTE: This implementation saves the pattern to a file as soon as they are
 * found or can keep the pattern into memory if no output path is provided by
 * the user.
 * 
 * @author Antonio Gomariz Peñalver
 */
public class AlgoPrefixSpan_AGP {

	/**
	 * the minimum support threshold
	 */
	protected double minSup;
	/**
	 * The minimum support relative threshold, i.e. the minimum number of
	 * sequences where the patterns have to be
	 */
	protected double relativeMinSupport;
	/**
	 * original sequential database to be used for sequential patterns
	 * extraction
	 */
	protected SequenceDatabase originalDataset;
	/**
	 * Saver variable to decide where the user want to save the results, if it
	 * the case
	 */
	Saver saver = null;
	/**
	 * Start and end points in order to calculate the overall time taken by the
	 * algorithm
	 */
	protected long start, end;
	/**
	 * The abstraction creator
	 */
	private AbstractionCreator abstractionCreator;
	/**
	 * Number of frequent patterns found by the algorithm
	 */
	private int numberOfFrequentPatterns = 0;

	/**
	 * Standard constructor. It takes the minimum support threshold (from 1 up
	 * to 0) and an abstraction creator
	 * 
	 * @param support
	 *            the minimum absolute support threshold
	 * @param creator
	 *            the abstraction creator
	 */
	public AlgoPrefixSpan_AGP(double support, AbstractionCreator creator) {
		this.minSup = support;
		this.abstractionCreator = creator;
	}

	/**
	 * Method that starts the execution of the algorithm.
	 * 
	 * @param database
	 *            The original database in which we apply PrefixSpan
	 * @param keepPatterns
	 *            Flag indicating if the user want to keep the frequent patterns
	 *            or he just want the amount of them
	 * @param verbose
	 *            Flag for debugging purposes
	 * @param outputFilePath
	 *            Path pointing out to the file where the output, composed of
	 *            frequent patterns, has to be kept. If, conversely, this
	 *            parameter is null, we understand that the user wants the
	 *            output in the main memory
	 * @throws IOException
	 */
	public void runAlgorithm(SequenceDatabase database, boolean keepPatterns,
			boolean verbose, String outputFilePath) throws IOException {
		// calculation of the minimum relative support
		relativeMinSupport = (int) (database.size() * minSup);
		if (this.relativeMinSupport == 0) { // protection
			this.relativeMinSupport = 1;
		}
		// reset the stats about memory usage
		MemoryLogger.getInstance().reset();
		// keeping the starting time
		start = System.currentTimeMillis();
		// Starting PrefixSpanAlgorithm
		prefixSpan(database, keepPatterns, verbose, outputFilePath);
		// keeping the ending time
		end = System.currentTimeMillis();
		// Search for frequent patterns: Finished
		saver.finish();
	}

	/**
	 * Method that executes the first steps before calling the actual main
	 * method of PrefixSpan. In particular, the original database is fully
	 * converted to a pseudosequece database, removing the infrequent items that
	 * appeared in the original dabase
	 * 
	 * @param database
	 *            The original database
	 * @param keepPatterns
	 *            Flag indicating if the user want to keep the frequent patterns
	 *            or he just want the amount of them
	 * @param verbose
	 *            Flag for debugging purposes
	 * @param outputFilePath
	 *            Path pointing out to the file where the output, composed of
	 *            frequent patterns, has to be kept. If, conversely, this
	 *            parameter is null, we understand that the user wants the
	 *            output in the main memory
	 * @throws IOException
	 */
	protected void prefixSpan(SequenceDatabase database, boolean keepPatterns,
			boolean verbose, String outputFilePath) throws IOException {
		// If we do no have any file path
		if (outputFilePath == null) {
			// The user wants to save the results in memory
			saver = new SaverIntoMemory();
		} else {
			// Otherwise, the user wants to save them in the given file
			saver = new SaverIntoFile(outputFilePath);
		}
		/*
		 * We get the map which relates the frequent items with their
		 * appearances in the database
		 */
		Map<Item, BitSet> mapSequenceID = database.getFrequentItems();
		/*
		 * projection of the original database in order to obtain a
		 * pseudosequence database
		 */
		PseudoSequenceDatabase pseudoDatabase = projectInitialDatabase(
				database, mapSequenceID, (int) relativeMinSupport);
		// We initialize the class that is in charge of managing the main loop
		// of PrefixSpan
		RecursionPrefixSpan_AGP algorithm = new RecursionPrefixSpan_AGP(
				abstractionCreator, saver, (int) relativeMinSupport,
				pseudoDatabase, mapSequenceID);
		// And we execute the actual algorithm
		algorithm.execute(keepPatterns, verbose);
		// Finally we update the number of frequent patterns that we found
		numberOfFrequentPatterns = algorithm.numberOfFrequentPatterns();
		// check the memory usage for statistics
		MemoryLogger.getInstance().checkMemory();
	}

	/**
	 * Method to get the outlined information about the search for frequent
	 * sequences by means of PrefixSpan algorithm as a string.
	 * 
	 * @return a string
	 */
	public String printStatistics() {
		StringBuilder sb = new StringBuilder(200);
		sb.append("=============  Algorithm - STATISTICS =============\n Total time ~ ");
		sb.append(getRunningTime());
		sb.append(" ms\n");
		sb.append(" Frequent sequences count : ");
		sb.append(numberOfFrequentPatterns);
		sb.append('\n');
		sb.append(" Max memory (mb):");
		sb.append(MemoryLogger.getInstance().getMaxMemory());
		sb.append('\n');
		sb.append(saver.print());
		sb.append('\n');
		sb.append("===================================================\n");
		return sb.toString();
	}

	/**
	 * Get the number of frequent patterns found.
	 * 
	 * @return the number of frequent patterns.
	 */
	public int getNumberOfFrequentPatterns() {
		return numberOfFrequentPatterns;
	}

	/**
	 * It gets the time spent by the algoritm in its execution.
	 * 
	 * @return the time spent (long).
	 */
	public long getRunningTime() {
		return (end - start);
	}

	/**
	 * It gets the minimum relative support, i.e. the minimum number of database
	 * sequences where a pattern has to appear
	 * 
	 * @return the minimum support (double)
	 */
	public double getRelativeMinSupport() {
		return relativeMinSupport;
	}

	/**
	 * It projects the initial database converting each original sequence to
	 * pseudosequences in order to enable the later pseudoprojections in the
	 * main loop of PrefixSpan
	 * 
	 * @param database
	 *            The original Database
	 * @param mapSequenceID
	 *            Map with all the items appearing in the original database, and
	 *            a bitset pointing out in which sequences the items appear
	 * @param minSupport
	 *            The minimum relative support
	 * @return
	 */
	private PseudoSequenceDatabase projectInitialDatabase(
			SequenceDatabase database, Map<Item, BitSet> mapSequenceID,
			long minSupport) {
		PseudoSequenceDatabase initialContext = new PseudoSequenceDatabase();
		// For each database sequence
		for (Sequence sequence : database.getSequences()) {
			// The new pseudosequences are optimized, since do not have the
			// infrequent items
			Sequence optimizedSequence = sequence.cloneSequenceMinusItems(
					mapSequenceID, minSupport);
			if (optimizedSequence.size() != 0) {
				/*
				 * If after remove the infrequent items, we remove all the items
				 * of an original sequence, we insert an empty pseudosequence in
				 * order not to affect to the minimum relative support
				 */
				PseudoSequence pseudoSequence = new PseudoSequence(0,
						optimizedSequence, 0, 0);
				initialContext.addSequence(pseudoSequence);
			}
		}
		return initialContext;
	}

	/**
	 * It clears all the attributes of AlgoPrefixSpan class
	 */
	public void clear() {
		if (originalDataset != null) {
			originalDataset.clear();
			originalDataset = null;
		}
		if (saver != null) {
			saver.clear();
			saver = null;
		}
		abstractionCreator = null;
	}
}
